A Manhattan plot is a specific type of scatter plot widely used in genomics to study GWAS results (Genome Wide Association Study).
Each point represents a genetic variant. The X axis shows its position on a chromosome, the Y axis tells how much it is associated with a trait.
Basic Manhattan plot
library(qqman) head(gwasResults)
## SNP CHR BP P ## 1 rs1 1 1 0.9148060 ## 2 rs2 1 2 0.9370754 ## 3 rs3 1 3 0.2861395 ## 4 rs4 1 4 0.8304476 ## 5 rs5 1 5 0.6417455 ## 6 rs6 1 6 0.5190959
# Make the Manhattan plot on the gwasResults dataset manhattan(gwasResults, chr="CHR", bp="BP", snp="SNP", p="P" )
Highlight a group of SNP
snpsOfInterest
## [1] "rs3001" "rs3002" "rs3003" "rs3004" "rs3005" "rs3006" "rs3007" "rs3008" "rs3009" "rs3010" "rs3011" "rs3012" "rs3013" ## [14] "rs3014" "rs3015" "rs3016" "rs3017" "rs3018" "rs3019" "rs3020" "rs3021" "rs3022" "rs3023" "rs3024" "rs3025" "rs3026" ## [27] "rs3027" "rs3028" "rs3029" "rs3030" "rs3031" "rs3032" "rs3033" "rs3034" "rs3035" "rs3036" "rs3037" "rs3038" "rs3039" ## [40] "rs3040" "rs3041" "rs3042" "rs3043" "rs3044" "rs3045" "rs3046" "rs3047" "rs3048" "rs3049" "rs3050" "rs3051" "rs3052" ## [53] "rs3053" "rs3054" "rs3055" "rs3056" "rs3057" "rs3058" "rs3059" "rs3060" "rs3061" "rs3062" "rs3063" "rs3064" "rs3065" ## [66] "rs3066" "rs3067" "rs3068" "rs3069" "rs3070" "rs3071" "rs3072" "rs3073" "rs3074" "rs3075" "rs3076" "rs3077" "rs3078" ## [79] "rs3079" "rs3080" "rs3081" "rs3082" "rs3083" "rs3084" "rs3085" "rs3086" "rs3087" "rs3088" "rs3089" "rs3090" "rs3091" ## [92] "rs3092" "rs3093" "rs3094" "rs3095" "rs3096" "rs3097" "rs3098" "rs3099" "rs3100"
manhattan(gwasResults, highlight = snpsOfInterest)
Annotate SNPs below specified p-value
manhattan(gwasResults, annotatePval = 0.01)
Highly customisable with ggplot2
library(ggplot2)
library(dplyr)
data <- gwasResults %>%
# Compute chromosome size
group_by(CHR) %>%
summarise(chr_len=max(BP)) %>%
# Calculate cumulative position of each chromosome
mutate(tot=cumsum(chr_len)-chr_len) %>%
select(-chr_len) %>%
# Add this info to the initial dataset
left_join(gwasResults, ., by=c("CHR"="CHR")) %>%
# Add a cumulative position of each SNP
arrange(CHR, BP) %>%
mutate( BPcum=BP+tot)
head(data)
## SNP CHR BP P tot BPcum ## 1 rs1 1 1 0.9148060 0 1 ## 2 rs2 1 2 0.9370754 0 2 ## 3 rs3 1 3 0.2861395 0 3 ## 4 rs4 1 4 0.8304476 0 4 ## 5 rs5 1 5 0.6417455 0 5 ## 6 rs6 1 6 0.5190959 0 6
axisdf <- data %>% group_by(CHR) %>% summarize(center=(max(BPcum)+min(BPcum))/2) head(axisdf)
## # A tibble: 6 × 2 ## CHR center ## <int> <dbl> ## 1 1 750. ## 2 2 2096 ## 3 3 3212. ## 4 4 4204 ## 5 5 5115 ## 6 6 5966
ggplot(data, aes(x=BPcum, y=-log10(P))) +
geom_point( aes(color=as.factor(CHR)), alpha=0.8, size=1.3) +
scale_color_manual(values = rep(c("#4197d8","#f8c120","#413496","#495226", "#d60b6f","#e66519",
"#d581b7","#83d3ad", "#7c162c","#26755d"), 3)) +
scale_x_continuous( label = axisdf$CHR, breaks= axisdf$center ) +
scale_y_continuous(expand = c(0, 0) ) + # remove space between plot area and x axis
theme_bw() +
theme(legend.position="none", panel.border = element_blank(),panel.grid.major.x = element_blank(),
panel.grid.minor.x = element_blank()
)
Highlight SNPs use ggrepel
library(ggrepel)
data <- gwasResults %>%
# Compute chromosome size
group_by(CHR) %>%
summarise(chr_len=max(BP)) %>%
# Calculate cumulative position of each chromosome
mutate(tot=cumsum(chr_len)-chr_len) %>%
select(-chr_len) %>%
# Add this info to the initial dataset
left_join(gwasResults, ., by=c("CHR"="CHR")) %>%
# Add a cumulative position of each SNP
arrange(CHR, BP) %>%
mutate( BPcum=BP+tot) %>%
# Add highlight and annotation information
mutate( is_highlight=ifelse(SNP %in% snpsOfInterest, "yes", "no")) %>%
mutate( is_annotate=ifelse(-log10(P)>4, "yes", "no"))
head(data)
## SNP CHR BP P tot BPcum is_highlight is_annotate ## 1 rs1 1 1 0.9148060 0 1 no no ## 2 rs2 1 2 0.9370754 0 2 no no ## 3 rs3 1 3 0.2861395 0 3 no no ## 4 rs4 1 4 0.8304476 0 4 no no ## 5 rs5 1 5 0.6417455 0 5 no no ## 6 rs6 1 6 0.5190959 0 6 no no
# Prepare X axis axisdf <- data %>% group_by(CHR) %>% summarize(center=(max(BPcum)+min(BPcum))/2) head(axisdf)
## # A tibble: 6 × 2 ## CHR center ## <int> <dbl> ## 1 1 750. ## 2 2 2096 ## 3 3 3212. ## 4 4 4204 ## 5 5 5115 ## 6 6 5966
ggplot(data, aes(x=BPcum, y=-log10(P))) +
geom_point( aes(color=as.factor(CHR)), alpha=0.8, size=1.3) +
scale_color_manual(values = rep(c("#4197d8","#f8c120","#413496","#495226", "#d60b6f","#e66519",
"#d581b7","#83d3ad", "#7c162c","#26755d"), 3)) +
scale_x_continuous( label = axisdf$CHR, breaks= axisdf$center ) +
scale_y_continuous(expand = c(0, 0) ) + # remove space between plot area and x axis
geom_point(data=subset(data, is_highlight=="yes"), color="orange", size=2) +
geom_label_repel( data=subset(data, is_annotate=="yes"), aes(label=SNP), size=2) + theme_bw() +
theme( legend.position="none", panel.border = element_blank(), panel.grid.major.x = element_blank(),
panel.grid.minor.x = element_blank())
Circular version of Manhattan plot
library("CMplot")
par(mar = c(0, 0, 0, 0))
CMplot(gwasResults,plot.type="c",r=3,outward=TRUE,file.output=FALSE,chr.labels=seq(1,22),verbose=FALSE)
Compare the p-values of several traits
data(pig60K) head(pig60K)
## SNP Chromosome Position trait1 trait2 trait3 ## 1 ALGA0000009 1 52297 0.7738187 0.51194318 0.51194318 ## 2 ALGA0000014 1 79763 0.7738187 0.51194318 0.51194318 ## 3 ALGA0000021 1 209568 0.7583016 0.98405289 0.98405289 ## 4 ALGA0000022 1 292758 0.7200305 0.48887140 0.48887140 ## 5 ALGA0000046 1 747831 0.9736840 0.22096836 0.22096836 ## 6 ALGA0000047 1 761957 0.9174565 0.05753712 0.05753712
par(mar = c(0, 0, 0, 0))
CMplot(pig60K, plot.type="c", chr.labels=c(1:18,"X","Y"), r=0.1, outward=FALSE, cir.chr.h=1.3,
file.output=FALSE, verbose = FALSE)